Get the data
url = "https://www.health.ny.gov/statistics/vital_statistics/2019/table24.htm"
url1 = "https://www.health.ny.gov/statistics/vital_statistics/2019/table13.htm"
induced_abortion_finance =
read_html(url) %>%
html_table(header = FALSE) %>%
first() %>%
janitor::clean_names()
live_birth_finance =
read_html(url1) %>%
html_table(header = FALSE) %>%
first() %>%
janitor::clean_names()
data cleaning
clean_lb_finance= live_birth_finance %>%
select(-c(4,6:8)) %>%
purrr::set_names(c("borough", "total", "Medicaid", "Self_Pay", "Other_Insurance", "Not_Stated")) %>%
slice(4, 6:11) %>%
mutate(
borough = str_replace(borough, "Kings", "Brooklyn"),
borough = ifelse(as.character(borough) == "New York", "Manhattan", as.character(borough)),
borough = str_replace(borough, "Richmond", "Staten_Island"),
total = str_replace(total, ",", ""),
Medicaid = str_replace(Medicaid, ",", ""),
Other_Insurance = str_replace(Other_Insurance, ",", ""),
Self_Pay = str_replace(Self_Pay, ",", ""),
Not_Stated = str_replace(Not_Stated, ",", "")
) %>%
mutate_at(c("total", "Medicaid", "Self_Pay","Other_Insurance", "Not_Stated"), as.numeric)
clean_ia_finance=
induced_abortion_finance %>%
select(c(1:3,5,6,8)) %>%
purrr::set_names(c("borough", "total", "Medicaid", "Other_Insurance", "Self_Pay", "Not_Stated")) %>%
slice(4, 6:11) %>%
mutate(
borough = str_replace(borough, "Kings", "Brooklyn"),
borough = ifelse(as.character(borough) == "New York", "Manhattan", as.character(borough)),
borough = str_replace(borough, "Richmond", "Staten_Island"),
total = str_replace(total, ",", ""),
Medicaid = str_replace(Medicaid, ",", ""),
Other_Insurance = str_replace(Other_Insurance, ",", ""),
Self_Pay = str_replace(Self_Pay, ",", ""),
Not_Stated = str_replace(Not_Stated, ",", "")
) %>%
mutate_at(c("total", "Medicaid","Other_Insurance", "Self_Pay", "Not_Stated"), as.numeric)
merged_data=
full_join(clean_lb_finance,clean_ia_finance, by="borough") %>%
janitor::clean_names() %>%
mutate(
medicaid = (medicaid_x / medicaid_y)*1000,
self_pay = (self_pay_x / self_pay_y)*1000,
other_insurance = (other_insurance_x / other_insurance_y)*1000,
not_stated = (not_stated_x / not_stated_y)*1000,
total = (total_x / total_y)*1000) %>%
select(borough, medicaid, self_pay, other_insurance, not_stated, total)
ggplot: induced abortion vs financial plans
abortion_finance_plot =
merged_data %>%
select(-total) %>%
pivot_longer(
medicaid:not_stated,
names_to = "finance_plan",
values_to = "abortion"
) %>%
plot_ly(x = ~finance_plan, y = ~abortion, color = ~borough, type = "bar", colors = "viridis") %>%
layout(title = 'Abortions in NYC by Financial Plan', yaxis = list(title = 'Number of Induced Abortions per 1,000 Live Births'))
abortion_finance_plot